################################################################################ 
################################################################################ 
#################### Meta-analysis on the Contact Hypothesis ###################
################################################################################ 
################################################################################ 

# This script creates descriptive statistics
# for the project 
# Meta-analysis on the Contact Hypothesis
# by Gwen-Jiro Clochard


################################################################################ 
################################## Import data #################################
################################################################################ 


all_measures <- 
  read_excel(paste(pathdata, "Clean//All_measures.xlsx", sep="//"))

indiv <- 
  read_excel(paste(pathdata, "Clean//Individual_papers.xlsx", sep="//"))


################################################################################ 
################### Cumulative distribution publication year ###################
################################################################################ 


plt <-
  ggplot(indiv, aes(x = Year)) +
  stat_ecdf(geom = "step", color = "blue") +
  labs(x = "Year", y = "Cumulative proportion") +
  theme_classic()

plt

ggsave(
  plot = plt,
  filename = paste(pathfigures, "trend_year.pdf", sep="//"),
  width = 7,
  height = 5
)



################################################################################ 
############################## Descriptive papers ##############################
################################################################################ 


##### Publication #####

descriptive_publication <-
  rbind(
    as.data.frame(table(indiv$year_category)),
    as.data.frame(table(indiv$Published)),
    as.data.frame(table(indiv$registration)),
    as.data.frame(table(indiv$citation_category))
  )

print(xtable(descriptive_publication,
             caption = "Descriptive statistics of papers"), 
      hline.after = NULL,
      include.colnames = FALSE,
      add.to.row = list(pos = list(0, 4, 6, 8, nrow(descriptive_publication)),
                        command = c('\\toprule\n', '\\midrule\n', '\\midrule\n', '\\midrule\n', '\\bottomrule\n')),
      include.rownames = FALSE
)


##### Trends in pre-registration #####

table_trends <- indiv %>%
  group_by(year_category) %>%
  summarise(
    perc_preregistered = round(mean(registration, na.rm = TRUE) * 100, 1),
    N = n()
  )

print(xtable(table_trends,
       caption = "Trends pre-registration",
       label = "tab:trends_preregistration"),
       include.rownames = FALSE)


##### Experimental design #####

descriptive_exp <-
  rbind(
    as.data.frame(table(indiv$zone)),
    as.data.frame(table(indiv$age_category)),
    as.data.frame(table(indiv$Prejudice)),
    as.data.frame(table(indiv$Contact_type)),
    as.data.frame(table(indiv$Nb_encounters)),
    as.data.frame(table(indiv$duration)),
    as.data.frame(table(indiv$Population)),
    as.data.frame(table(indiv$sample))
  )

print(xtable(descriptive_exp,
             caption = "Descriptive statistics of experiments"), 
      hline.after = NULL,
      include.colnames = FALSE,
      add.to.row = list(pos = list(0, 6, 9, 18, 26, 29, 32, 37, nrow(descriptive_exp)),
                        command = c('\\toprule\n', '\\midrule\n', '\\midrule\n', '\\midrule\n', '\\midrule\n', 
                                    '\\midrule\n', '\\midrule\n', '\\midrule\n', '\\bottomrule\n')),
      include.rownames = FALSE
)



################################################################################ 
############################# Descriptive measures #############################
################################################################################ 

descriptive_measures <- 
  rbind(
    as.data.frame(table(all_measures$Outcome_type)),
    as.data.frame(table(all_measures$Behavior_type)),
    as.data.frame(table(all_measures$Same_people)),
    as.data.frame(table(all_measures$time))
    )

print(xtable(descriptive_measures,
             caption = "Descriptive statistics of measures"), 
      hline.after = NULL,
      include.colnames = FALSE,
      add.to.row = list(pos = list(0, 5, 7, nrow(descriptive_measures)),
                        command = c('\\toprule\n', '\\midrule\n', '\\midrule\n', '\\bottomrule\n')),
      include.rownames = FALSE
      )


################################################################################ 
########################## Histogram outcome variable ##########################
################################################################################ 


### Draw graph

hist(all_measures$effect, breaks = 20,
     main = '', xlab = 'Standardized effect size',
     xlim = range(-1,2),
     ylim = range(0,60),
)

abline(v=0, col='black', lwd=3)
abline(v=mean(all_measures$effect), col='red', lwd=3, lty='dashed')
text(mean(all_measures$effect)+.2, 60, paste("Mean = ", str_trunc(mean(all_measures$effect), 5, ellipsis = ""), sep = ""), col = 'red')


### Save graph

pdf(paste(pathfigures, "Distrib_effect.pdf", sep = "//"), width = 7, height = 5)

hist(all_measures$effect, breaks = 20,
     main = '', xlab = 'Standardized effect size',
     xlim = range(-1,2),
     ylim = range(0,60),
)

abline(v=0, col='black', lwd=3)
abline(v=mean(all_measures$effect), col='red', lwd=3, lty='dashed')
text(mean(all_measures$effect)+.2, 60, paste("Mean = ", str_trunc(mean(all_measures$effect), 5, ellipsis = ""), sep = ""), col = 'red')

dev.off()


################################################################################ 
########################### Descriptive table outcome ##########################
################################################################################ 


# Summary statistics
table_outcome_summary <- all_measures %>%
  dplyr::summarize(
    Var    = "Standardized treatment effect",
    N      = n(),
    Mean   = mean(effect, na.rm = TRUE),
    SD     = sd(effect, na.rm = TRUE),
    Q1     = quantile(effect, 0.25, na.rm = TRUE),
    Median = median(effect, na.rm = TRUE),
    Q3     = quantile(effect, 0.75, na.rm = TRUE),
    Min    = min(effect, na.rm = TRUE),
    Max    = max(effect, na.rm = TRUE)
  )


print(xtable(table_outcome_summary,
             caption = "Descriptive statistics of outcome"), 
      hline.after = NULL,
      include.colnames = TRUE,
      include.rownames = FALSE
)



################################################################################ 
######################## Descriptive Allport conditions ########################
################################################################################ 


var_labels <- c(
  mean_common_1 = "Mean common goal 1",
  mean_equal_1 = "Mean equal status 1",
  mean_positive_1  = "Mean positive encounter 1", 
  mean_support_1 = "Mean support of authorities 1", 
  mean_friendship_1 = "Mean friendship potential 1", 
  mean_common_2 = "Mean common goal 2",
  mean_equal_2 = "Mean equal status 2",
  mean_positive_2  = "Mean positive encounter 2", 
  mean_support_2 = "Mean support of authorities 2", 
  mean_friendship_2 = "Mean friendship potential 2", 
  common_GJ = "Common goal author", 
  equal_GJ = "Equal status author", 
  positive_GJ = "Positive encounter author", 
  support_GJ = "Support of authorities author", 
  friendship_GJ = "Friendship potential author",
  common_gpt = "Common goal GPT", 
  equal_gpt = "Equal status GPT", 
  positive_gpt = "Positive contact GPT", 
  support_gpt = "Support of authorities GPT", 
  friendship_gpt = "Friendship potential GPT"
)

vars_order <- names(var_labels)

descriptive_allport <- indiv %>%
  dplyr::select(all_of(vars_order)) %>%
  tidyr::pivot_longer(
    cols = everything(),
    names_to = "Var",
    values_to = "value"
  ) %>%
  dplyr::mutate(
    Var = factor(var_labels[Var], levels = var_labels)
  ) %>%
  dplyr::group_by(Var) %>%
  dplyr::summarize(
    N      = sum(!is.na(value)),
    Mean   = mean(value, na.rm = TRUE),
    SD     = sd(value, na.rm = TRUE),
    .groups = "drop"
  )


print(xtable(descriptive_allport,
             caption = "Descriptive statistics of conditions"), 
      hline.after = NULL,
      include.colnames = TRUE,
      include.rownames = FALSE
)



################################################################################ 
######################### Pairwise correlation Allport #########################
################################################################################ 


##### First experiment #####

var_labels <- c(
  mean_common_1     = "Common goal",
  mean_equal_1      = "Equal status",
  mean_positive_1   = "Positive encounter",
  mean_support_1    = "Support of authorities",
  mean_friendship_1 = "Friendship potential"
)

vars_order <- names(var_labels)

data_sel <- indiv %>%
  dplyr::select(all_of(vars_order))


cor_mat  <- matrix(NA, ncol = ncol(data_sel), nrow = ncol(data_sel))
p_mat    <- matrix(NA, ncol = ncol(data_sel), nrow = ncol(data_sel))

for(i in seq_len(ncol(data_sel))) {
  for(j in seq_len(ncol(data_sel))) {
    test <- cor.test(data_sel[[i]], data_sel[[j]],
                     use = "pairwise.complete.obs")
    cor_mat[i, j] <- test$estimate
    p_mat[i, j]   <- test$p.value
  }
}

stars <- function(p) {
  ifelse(p < .005, "***",
         ifelse(p < .01,  "**",
                ifelse(p < .05,  "*", "")))
}

cor_final <- matrix("", ncol = ncol(cor_mat), nrow = nrow(cor_mat))

for(i in seq_len(ncol(cor_mat))) {
  for(j in seq_len(ncol(cor_mat))) {
    if(i >= j) {  # keep diagonal + lower triangle only
      cor_final[i, j] <- sprintf("%.3f%s",
                                 cor_mat[i, j],
                                 stars(p_mat[i, j]))
    }
  }
}

cor_final <- as.data.frame(cor_final)

colnames(cor_final) <- var_labels
rownames(cor_final) <- var_labels

print(xtable(cor_final,
             caption = "Pairwise correlation first experiment"), 
      hline.after = NULL,
      include.colnames = TRUE,
      include.rownames = TRUE
)



##### Second experiment #####

var_labels <- c(
  mean_common_2     = "Common goal",
  mean_equal_2      = "Equal status",
  mean_positive_2   = "Positive encounter",
  mean_support_2    = "Support of authorities",
  mean_friendship_2 = "Friendship potential"
)

vars_order <- names(var_labels)

data_sel <- indiv %>%
  dplyr::select(all_of(vars_order))


cor_mat  <- matrix(NA, ncol = ncol(data_sel), nrow = ncol(data_sel))
p_mat    <- matrix(NA, ncol = ncol(data_sel), nrow = ncol(data_sel))

for(i in seq_len(ncol(data_sel))) {
  for(j in seq_len(ncol(data_sel))) {
    test <- cor.test(data_sel[[i]], data_sel[[j]],
                     use = "pairwise.complete.obs")
    cor_mat[i, j] <- test$estimate
    p_mat[i, j]   <- test$p.value
  }
}

stars <- function(p) {
  ifelse(p < .005, "***",
         ifelse(p < .01,  "**",
                ifelse(p < .05,  "*", "")))
}

cor_final <- matrix("", ncol = ncol(cor_mat), nrow = nrow(cor_mat))

for(i in seq_len(ncol(cor_mat))) {
  for(j in seq_len(ncol(cor_mat))) {
    if(i >= j) {  # keep diagonal + lower triangle only
      cor_final[i, j] <- sprintf("%.3f%s",
                                 cor_mat[i, j],
                                 stars(p_mat[i, j]))
    }
  }
}

cor_final <- as.data.frame(cor_final)

colnames(cor_final) <- var_labels
rownames(cor_final) <- var_labels

print(xtable(cor_final,
             caption = "Pairwise correlation second experiment"), 
      hline.after = NULL,
      include.colnames = TRUE,
      include.rownames = TRUE
)


################################################################################ 
########################## Correlation two experiments #########################
################################################################################ 

### Correlations

# Common goal
lm1 <- 
  lm(
    data = indiv, 
    formula = mean_common_1 ~ mean_common_2
  )
summary(lm1)

# Equal status
lm2 <- 
  lm(
    data = indiv, 
    formula = mean_equal_1 ~ mean_equal_2
  )
summary(lm2)


# Positive encounter
lm3 <- 
  lm(
    data = indiv, 
    formula = mean_positive_1 ~ mean_positive_2
  )
summary(lm3)


# Support of authorities
lm4 <- 
  lm(
    data = indiv, 
    formula = mean_support_1 ~ mean_support_2
  )
summary(lm4)


# Friendship potential
lm5 <- 
  lm(
    data = indiv, 
    formula = mean_friendship_1 ~ mean_friendship_2
  )
summary(lm5)


### Exporting table

collabels = c("Common goal", "Equal status", "Positive encounter", "Support of authorities", "Friendship potential")

covlabels = c("Common goal 2", "Equal status 2", "Positive encounter 2", "Support of authorities 2", "Friendship potential 2")

notes = "The dependent variables are the means for each variable for the Prolific sample for the first experiment (without incentives) and the independent variables are the similar means for the second experiment (with incentives). * p < 0.05, ** p < 0.01, *** p < 0.005. Standard errors in parentheses."

title = "Consistency of outcomes in the two experiments"

label = "tab: consistency"

stargazer(lm1, lm2, lm3, lm4, lm5,
          type='latex', 
          table.placement = "htbp",
          title = title,
          style = "all2",
          label = label,
          # align = TRUE,
          column.labels = collabels,
          covariate.labels = covlabels,
          # keep = keeps,
          keep.stat = c("rsq", "n"),
          summary=FALSE,
          dep.var.caption = NULL,
          mean.sd = TRUE,
          digits = 3,
          notes = notes,
          out = paste(pathtables, "Correlation_two_experiments.tex", sep="//"),
          notes.append = FALSE,
          notes.align = "l",
          header = FALSE, 
          star.cutoffs = c(0.05, 0.01, 0.005)
)


################################################################################ 
########################## Correlation experiment GPT ##########################
################################################################################ 

### Correlations

# Common goal
lm1 <- 
  lm(
    data = indiv, 
    formula = mean_common_2 ~ common_gpt
  )
summary(lm1)

# Equal status
lm2 <- 
  lm(
    data = indiv, 
    formula = mean_equal_2 ~ equal_gpt
  )
summary(lm2)


# Positive encounter
lm3 <- 
  lm(
    data = indiv, 
    formula = mean_positive_2 ~ positive_gpt
  )
summary(lm3)


# Support of authorities
lm4 <- 
  lm(
    data = indiv, 
    formula = mean_support_2 ~ support_gpt
  )
summary(lm4)


# Friendship potential
lm5 <- 
  lm(
    data = indiv, 
    formula = mean_friendship_2 ~ friendship_gpt
  )
summary(lm5)


### Exporting table

collabels = c("Common goal", "Equal status", "Positive encounter", "Support of authorities", "Friendship potential")

covlabels = c("Common goal GPT", "Equal status GPT", "Positive encounter GPT", "Support of authorities GPT", "Friendship potential GPT")

notes = "The dependent variables are the means for each variable for the Prolific sample. * p < 0.05, ** p < 0.01, *** p < 0.005. Standard errors in parentheses."

title = "Consistency of outcomes between the experiments and ChatGPT"

label = "tab: comparison experiment GPT"

stargazer(lm1, lm2, lm3, lm4, lm5,
          type='latex', 
          table.placement = "htbp",
          title = title,
          style = "all2",
          label = label,
          # align = TRUE,
          column.labels = collabels,
          covariate.labels = covlabels,
          # keep = keeps,
          keep.stat = c("rsq", "n"),
          summary=FALSE,
          dep.var.caption = NULL,
          mean.sd = TRUE,
          digits = 3,
          notes = notes,
          out = paste(pathtables, "Correlation_exp_GPT.tex", sep="//"),
          notes.append = FALSE,
          notes.align = "l",
          header = FALSE, 
          star.cutoffs = c(0.05, 0.01, 0.005)
)


################################################################################ 
########################## Correlation experiment own ##########################
################################################################################ 

### Correlations

# Common goal
lm1 <- 
  lm(
    data = indiv, 
    formula = mean_common_2 ~ common_GJ
  )
summary(lm1)

# Equal status
lm2 <- 
  lm(
    data = indiv, 
    formula = mean_equal_2 ~ equal_GJ
  )
summary(lm2)


# Positive encounter
lm3 <- 
  lm(
    data = indiv, 
    formula = mean_positive_2 ~ positive_GJ
  )
summary(lm3)


# Support of authorities
lm4 <- 
  lm(
    data = indiv, 
    formula = mean_support_2 ~ support_GJ
  )
summary(lm4)


# Friendship potential
lm5 <- 
  lm(
    data = indiv, 
    formula = mean_friendship_2 ~ friendship_GJ
  )
summary(lm5)


### Exporting table

collabels = c("Common goal", "Equal status", "Positive encounter", "Support of authorities", "Friendship potential")

covlabels = c("Common goal author", "Equal status author", "Positive encounter author", "Support of authorities author", "Friendship potential author")

notes = "The dependent variables are the means for each variable for the Prolific sample. The independent variables are the author's own evaluations. * p < 0.05, ** p < 0.01, *** p < 0.005. Standard errors in parentheses."

title = "Consistency of outcomes between the experiments and author's own evaluation"

label = "tab: comparison experiment GJ"

stargazer(lm1, lm2, lm3, lm4, lm5,
          type='latex', 
          table.placement = "htbp",
          title = title,
          style = "all2",
          label = label,
          # align = TRUE,
          column.labels = collabels,
          covariate.labels = covlabels,
          # keep = keeps,
          keep.stat = c("rsq", "n"),
          summary=FALSE,
          dep.var.caption = NULL,
          mean.sd = TRUE,
          digits = 3,
          notes = notes,
          out = paste(pathtables, "Correlation_exp_GJ.tex", sep="//"),
          notes.append = FALSE,
          notes.align = "l",
          header = FALSE, 
          star.cutoffs = c(0.05, 0.01, 0.005)
)


################################################################################ 
############################## Correlation own GPT #############################
################################################################################ 

### Correlations

# Common goal
lm1 <- 
  lm(
    data = indiv, 
    formula = common_gpt ~ common_GJ
  )
summary(lm1)

# Equal status
lm2 <- 
  lm(
    data = indiv, 
    formula = equal_gpt ~ equal_GJ
  )
summary(lm2)


# Positive encounter
lm3 <- 
  lm(
    data = indiv, 
    formula = positive_gpt ~ positive_GJ
  )
summary(lm3)


# Support of authorities
lm4 <- 
  lm(
    data = indiv, 
    formula = support_gpt ~ support_GJ
  )
summary(lm4)


# Friendship potential
lm5 <- 
  lm(
    data = indiv, 
    formula = friendship_gpt ~ friendship_GJ
  )
summary(lm5)


### Exporting table

collabels = c("Common goal GPT", "Equal status GPT", "Positive encounter GPT", "Support of authorities GPT", "Friendship potential GPT")

covlabels = c("Common goal author", "Equal status author", "Positive encounter author", "Support of authorities author", "Friendship potential author")

notes = "The dependent variables are the values given for each condition by ChatGPT. The independent variables are the author's own evaluation. * p < 0.05, ** p < 0.01, *** p < 0.005. Standard errors in parentheses."

title = "Consistency of outcomes between ChatGPT and author's evaluation"

label = "tab: comparison GPT GJ"

stargazer(lm1, lm2, lm3, lm4, lm5,
          type='latex', 
          table.placement = "htbp",
          title = title,
          style = "all2",
          label = label,
          # align = TRUE,
          column.labels = collabels,
          covariate.labels = covlabels,
          # keep = keeps,
          keep.stat = c("rsq", "n"),
          summary=FALSE,
          dep.var.caption = NULL,
          mean.sd = TRUE,
          digits = 3,
          notes = notes,
          out = paste(pathtables, "Correlation_GPT_GJ.tex", sep="//"),
          notes.append = FALSE,
          notes.align = "l",
          header = FALSE, 
          star.cutoffs = c(0.05, 0.01, 0.005)
)


################################################################################ 
######################### Other descriptive statistics #########################
################################################################################ 


### Number of papers published after 2019

indiv$after_paluck <- as.numeric(indiv$Year > 2019)
table(indiv$after_paluck)


### Total and mean number of subjects

sum(indiv$N)
median(indiv$N)


### Observations per paper
# Number of observations per paper
cat("Number of obs. per paper\n")
cat(sprintf(" - mean  : %1.2f\n", mean(indiv$nb_measures)))
cat(sprintf(" - median: %d\n", median(indiv$nb_measures)))
cat(sprintf(" - max   : %d\n\n", max(indiv$nb_measures)))


### Number of negative contact effects

all_measures$negative <- as.numeric(all_measures$effect < 0)
table(all_measures$negative)

all_measures$neg_sign <- 
  as.numeric(all_measures$effect < 0 & 
               abs(all_measures$effect)/all_measures$se_effect >1.96)
table(all_measures$neg_sign)


### Papers claiming to fulfill some of Allport's conditions

table(indiv$any_conditions)
table(indiv$mention_common)
table(indiv$mention_equal)
table(indiv$mention_positive)
table(indiv$mention_support)
table(indiv$mention_friendship)

vars_check <- c(
  "mention_common",
  "mention_equal",
  "mention_positive",
  "mention_support",
  "mention_friendship"
)

indiv <- indiv %>%
  mutate(
    n_not_mentioned = rowSums(
      select(., all_of(vars_check)) == "Not mentioned",
      na.rm = TRUE
    )
  )

table(indiv$n_not_mentioned)


### Min and max common goal

summary(indiv$mean_common_2)


### Number of experiments in the US

table(indiv$Country)


################################################################################ 
################################ Clearing memory ###############################
################################################################################ 

rm(all_measures, indiv,
   descriptive_exp, descriptive_measures, descriptive_publication, 
   table_trends, 
   table_outcome_summary, plt, 
   lm1, lm2, lm3, lm4, lm5, 
   collabels, covlabels, notes, title, label, vars_check, 
   data_sel, p_mat, cor_final, descriptive_allport, test, cor_mat, 
   i,j, var_labels, vars_order
   )

